For processing Agilent microarray data, I needed to install a few packages that were used for the importing and analyzing the data. This included Bioconductor as well as Linear Models for Microarray (Limma). These are popular packages for processing microarray data.
## For installing Bioconductor on R
# if (!require("BiocManager", quietly = TRUE))
# install.packages("BiocManager")
# BiocManager::install(version = "3.14")
## For installing limma on R
# if (!require("BiocManager", quietly = TRUE))
# install.packages("BiocManager")
# BiocManager::install("limma")
After installing the packages above, I had to load them in using the library function so that I could access their functions.
library(limma)
library(dplyr)
library(statmod)
Now that the library was loaded, I was able to import the raw data. I started by importing the runsheet into R. This contained the necessary data for processing later in the pipeline.
dir = "C:/Users/tangk/Desktop/Agilent_Microarray"
GLDS_174_rs <- read.csv(file.path(dir, "Runsheet(GLDS-174).csv"), check.names = FALSE, fileEncoding = 'UTF-8-BOM') ## Outputs a dataframe
Factor_Value <- GLDS_174_rs[,c("Factor Value[sex]", "Factor Value[exposure duration]")]
Here, I imported the raw data from the text files into R using the readmaimages() function from the Limma package.
datadir = "C:/Users/tangk/Desktop/Agilent_Microarray/input_data"
files = dir(path = file.path(datadir, "GLDS-174/GLDS-174_Raw_Data_Combined"), pattern="*\\.txt$")
## Input: ".txt" files, Output: EListRaw (Raw expression levels)
raw_data <- limma::read.maimages(files, source="agilent", path = file.path(datadir, "GLDS-174/GLDS-174_Raw_Data_Combined"), sep="\t", green.only = TRUE)
## Read C:/Users/tangk/Desktop/Agilent_Microarray/input_data/GLDS-174/GLDS-174_Raw_Data_Combined/GSM1930654_1p1.txt
## Read C:/Users/tangk/Desktop/Agilent_Microarray/input_data/GLDS-174/GLDS-174_Raw_Data_Combined/GSM1930655_1p2.txt
## Read C:/Users/tangk/Desktop/Agilent_Microarray/input_data/GLDS-174/GLDS-174_Raw_Data_Combined/GSM1930656_1p3.txt
## Read C:/Users/tangk/Desktop/Agilent_Microarray/input_data/GLDS-174/GLDS-174_Raw_Data_Combined/GSM1930657_1p4.txt
## Read C:/Users/tangk/Desktop/Agilent_Microarray/input_data/GLDS-174/GLDS-174_Raw_Data_Combined/GSM1930658_1p5.txt
## Read C:/Users/tangk/Desktop/Agilent_Microarray/input_data/GLDS-174/GLDS-174_Raw_Data_Combined/GSM1930659_1p6.txt
## Read C:/Users/tangk/Desktop/Agilent_Microarray/input_data/GLDS-174/GLDS-174_Raw_Data_Combined/GSM1930660_3p1.txt
## Read C:/Users/tangk/Desktop/Agilent_Microarray/input_data/GLDS-174/GLDS-174_Raw_Data_Combined/GSM1930661_3p2.txt
## Read C:/Users/tangk/Desktop/Agilent_Microarray/input_data/GLDS-174/GLDS-174_Raw_Data_Combined/GSM1930662_3p3.txt
## Read C:/Users/tangk/Desktop/Agilent_Microarray/input_data/GLDS-174/GLDS-174_Raw_Data_Combined/GSM1930663_3p4.txt
## Read C:/Users/tangk/Desktop/Agilent_Microarray/input_data/GLDS-174/GLDS-174_Raw_Data_Combined/GSM1930664_3p5.txt
## Read C:/Users/tangk/Desktop/Agilent_Microarray/input_data/GLDS-174/GLDS-174_Raw_Data_Combined/GSM1930665_3p6.txt
## Read C:/Users/tangk/Desktop/Agilent_Microarray/input_data/GLDS-174/GLDS-174_Raw_Data_Combined/GSM1930666_4p1.txt
## Read C:/Users/tangk/Desktop/Agilent_Microarray/input_data/GLDS-174/GLDS-174_Raw_Data_Combined/GSM1930667_4p2.txt
## Read C:/Users/tangk/Desktop/Agilent_Microarray/input_data/GLDS-174/GLDS-174_Raw_Data_Combined/GSM1930668_4p3.txt
## Read C:/Users/tangk/Desktop/Agilent_Microarray/input_data/GLDS-174/GLDS-174_Raw_Data_Combined/GSM1930669_4p4.txt
## Read C:/Users/tangk/Desktop/Agilent_Microarray/input_data/GLDS-174/GLDS-174_Raw_Data_Combined/GSM1930670_4p5.txt
## Read C:/Users/tangk/Desktop/Agilent_Microarray/input_data/GLDS-174/GLDS-174_Raw_Data_Combined/GSM1930671_4p6.txt
## Read C:/Users/tangk/Desktop/Agilent_Microarray/input_data/GLDS-174/GLDS-174_Raw_Data_Combined/GSM1930672_6p1.txt
## Read C:/Users/tangk/Desktop/Agilent_Microarray/input_data/GLDS-174/GLDS-174_Raw_Data_Combined/GSM1930673_6p2.txt
## Read C:/Users/tangk/Desktop/Agilent_Microarray/input_data/GLDS-174/GLDS-174_Raw_Data_Combined/GSM1930674_6p3.txt
## Read C:/Users/tangk/Desktop/Agilent_Microarray/input_data/GLDS-174/GLDS-174_Raw_Data_Combined/GSM1930675_6p4.txt
## Read C:/Users/tangk/Desktop/Agilent_Microarray/input_data/GLDS-174/GLDS-174_Raw_Data_Combined/GSM1930676_6p5.txt
## Read C:/Users/tangk/Desktop/Agilent_Microarray/input_data/GLDS-174/GLDS-174_Raw_Data_Combined/GSM1930677_6p6.txt
## Read C:/Users/tangk/Desktop/Agilent_Microarray/input_data/GLDS-174/GLDS-174_Raw_Data_Combined/GSM1930678_7p1.txt
## Read C:/Users/tangk/Desktop/Agilent_Microarray/input_data/GLDS-174/GLDS-174_Raw_Data_Combined/GSM1930679_7p2.txt
## Read C:/Users/tangk/Desktop/Agilent_Microarray/input_data/GLDS-174/GLDS-174_Raw_Data_Combined/GSM1930680_7p3.txt
## Read C:/Users/tangk/Desktop/Agilent_Microarray/input_data/GLDS-174/GLDS-174_Raw_Data_Combined/GSM1930681_7p4.txt
## Read C:/Users/tangk/Desktop/Agilent_Microarray/input_data/GLDS-174/GLDS-174_Raw_Data_Combined/GSM1930682_7p5.txt
## Read C:/Users/tangk/Desktop/Agilent_Microarray/input_data/GLDS-174/GLDS-174_Raw_Data_Combined/GSM1930683_7p6.txt
## Read C:/Users/tangk/Desktop/Agilent_Microarray/input_data/GLDS-174/GLDS-174_Raw_Data_Combined/GSM1930684_8p1.txt
## Read C:/Users/tangk/Desktop/Agilent_Microarray/input_data/GLDS-174/GLDS-174_Raw_Data_Combined/GSM1930685_8p2.txt
## Read C:/Users/tangk/Desktop/Agilent_Microarray/input_data/GLDS-174/GLDS-174_Raw_Data_Combined/GSM1930686_8p3.txt
## Read C:/Users/tangk/Desktop/Agilent_Microarray/input_data/GLDS-174/GLDS-174_Raw_Data_Combined/GSM1930687_8p4.txt
## Read C:/Users/tangk/Desktop/Agilent_Microarray/input_data/GLDS-174/GLDS-174_Raw_Data_Combined/GSM1930688_8p5.txt
## Read C:/Users/tangk/Desktop/Agilent_Microarray/input_data/GLDS-174/GLDS-174_Raw_Data_Combined/GSM1930689_8p6.txt
## Read C:/Users/tangk/Desktop/Agilent_Microarray/input_data/GLDS-174/GLDS-174_Raw_Data_Combined/GSM1930690_9p1.txt
## Read C:/Users/tangk/Desktop/Agilent_Microarray/input_data/GLDS-174/GLDS-174_Raw_Data_Combined/GSM1930691_9p2.txt
## Read C:/Users/tangk/Desktop/Agilent_Microarray/input_data/GLDS-174/GLDS-174_Raw_Data_Combined/GSM1930692_9p3.txt
## Read C:/Users/tangk/Desktop/Agilent_Microarray/input_data/GLDS-174/GLDS-174_Raw_Data_Combined/GSM1930693_9p4.txt
## Read C:/Users/tangk/Desktop/Agilent_Microarray/input_data/GLDS-174/GLDS-174_Raw_Data_Combined/GSM1930694_9p5.txt
## Read C:/Users/tangk/Desktop/Agilent_Microarray/input_data/GLDS-174/GLDS-174_Raw_Data_Combined/GSM1930695_9p6.txt
## Read C:/Users/tangk/Desktop/Agilent_Microarray/input_data/GLDS-174/GLDS-174_Raw_Data_Combined/GSM1930696_10p1.txt
## Read C:/Users/tangk/Desktop/Agilent_Microarray/input_data/GLDS-174/GLDS-174_Raw_Data_Combined/GSM1930697_10p2.txt
## Read C:/Users/tangk/Desktop/Agilent_Microarray/input_data/GLDS-174/GLDS-174_Raw_Data_Combined/GSM1930698_10p3.txt
## Read C:/Users/tangk/Desktop/Agilent_Microarray/input_data/GLDS-174/GLDS-174_Raw_Data_Combined/GSM1930699_10p4.txt
## Read C:/Users/tangk/Desktop/Agilent_Microarray/input_data/GLDS-174/GLDS-174_Raw_Data_Combined/GSM1930700_10p5.txt
## Read C:/Users/tangk/Desktop/Agilent_Microarray/input_data/GLDS-174/GLDS-174_Raw_Data_Combined/GSM1930701_10p6.txt
## Read C:/Users/tangk/Desktop/Agilent_Microarray/input_data/GLDS-174/GLDS-174_Raw_Data_Combined/GSM1930702_11p1.txt
## Read C:/Users/tangk/Desktop/Agilent_Microarray/input_data/GLDS-174/GLDS-174_Raw_Data_Combined/GSM1930703_11p2.txt
## Read C:/Users/tangk/Desktop/Agilent_Microarray/input_data/GLDS-174/GLDS-174_Raw_Data_Combined/GSM1930704_11p3.txt
## Read C:/Users/tangk/Desktop/Agilent_Microarray/input_data/GLDS-174/GLDS-174_Raw_Data_Combined/GSM1930705_11p4.txt
## Read C:/Users/tangk/Desktop/Agilent_Microarray/input_data/GLDS-174/GLDS-174_Raw_Data_Combined/GSM1930706_11p5.txt
## Read C:/Users/tangk/Desktop/Agilent_Microarray/input_data/GLDS-174/GLDS-174_Raw_Data_Combined/GSM1930707_11p6.txt
## Read C:/Users/tangk/Desktop/Agilent_Microarray/input_data/GLDS-174/GLDS-174_Raw_Data_Combined/GSM1930708_12p1.txt
## Read C:/Users/tangk/Desktop/Agilent_Microarray/input_data/GLDS-174/GLDS-174_Raw_Data_Combined/GSM1930709_12p2.txt
## Read C:/Users/tangk/Desktop/Agilent_Microarray/input_data/GLDS-174/GLDS-174_Raw_Data_Combined/GSM1930710_12p3.txt
## Read C:/Users/tangk/Desktop/Agilent_Microarray/input_data/GLDS-174/GLDS-174_Raw_Data_Combined/GSM1930711_12p4.txt
## Read C:/Users/tangk/Desktop/Agilent_Microarray/input_data/GLDS-174/GLDS-174_Raw_Data_Combined/GSM1930712_12p5.txt
## Read C:/Users/tangk/Desktop/Agilent_Microarray/input_data/GLDS-174/GLDS-174_Raw_Data_Combined/GSM1930713_12p6.txt
In this step, I looked at the raw data to make sure that it is in the correct format so that it could be processed correctly down the pipeline.
# This code block uses the plotDensities() function from limma to generate a graph with the raw intensities vs. density
plotDensities(raw_data, log = TRUE, legend = "topright" , main = "Density of raw expression values for multiple arrays")
## Warning in plotDensities.EListRaw(raw_data, log = TRUE, legend = "topright", :
## NaNs produced
# This code block uses the imageplot() function to generate a pseudoimage for each sample to allow the user to look for any spatial abnormalities in the expressions on the chip.
num_rows <- nrow(raw_data)
find_factors <- function(num) {
factors_list <- list()
for (n in 1:num) {
if((num %% n) == 0) {
factors_list <- c(factors_list, n)
}
}
return(factors_list)
}
factors <- find_factors(num_rows)
for (sample_name in colnames(raw_data$E)) {
if (length(raw_data$printer) != 0) {
imageplot(raw_data$E[,sample_name], layout=raw_data$printer, zlim = c((1.25*min(raw_data$E[,sample_name])),(0.75*max(raw_data$E[,sample_name]))), legend = TRUE, main = sample_name)
}
else {
if (length(factors) %% 2 == 0) {
rows <- factors[[length(factors)/2]]
columns <- factors[[length(factors)/2+1]]
}
else {
rows <- factors[[ceiling(length(factors)/2)]]
columns <- factors[[ceiling(length(factors)/2)]]
}
imageplot(raw_data$E[,sample_name], layout=list(ngrid.r = 1, ngrid.c = 1, nspot.r = rows , nspot.c = columns), zlim = c((1.25*min(raw_data$E[,sample_name])),(0.75*max(raw_data$E[,sample_name]))), legend = TRUE, main = sample_name)
}
}
# This code block uses the plotMA() function from limma to generate a MA plot for each of the samples.
count = 0
for (sample_name in colnames(raw_data$E)) {
count = count + 1
suppressWarnings(limma::plotMA(raw_data,array=count,xlab="Average log-expression",ylab="Expression log-ratio(this sample vs. others)", main = sample_name))
}
# This code block uses the plotFB() function from limma to generate a FB plot to compare the foreground and background intensities in each sample.
count = 0
for (sample_name in colnames(raw_data$E)) {
count = count + 1
plotFB(raw_data, array = count, xlab = "log2 Background", ylab = "log2 Foreground", main = sample_name)
}
After making sure that the raw data looks good, I had to remove the technical variation in the raw intensities that comes from external sources (i.e. Sample preparation) by performing normalization on the raw data. For this step, I was able to use a couple other functions from Limma called backgroundCorrect() to subtract the foreground intensities from the background intensities and normalizeBetweenArrays() to normalize the data.
corrected_data <- limma::backgroundCorrect(raw_data, method = "normexp") # Corrects intensities for background
## Array 1 corrected
## Array 2 corrected
## Array 3 corrected
## Array 4 corrected
## Array 5 corrected
## Array 6 corrected
## Array 7 corrected
## Array 8 corrected
## Array 9 corrected
## Array 10 corrected
## Array 11 corrected
## Array 12 corrected
## Array 13 corrected
## Array 14 corrected
## Array 15 corrected
## Array 16 corrected
## Array 17 corrected
## Array 18 corrected
## Array 19 corrected
## Array 20 corrected
## Array 21 corrected
## Array 22 corrected
## Array 23 corrected
## Array 24 corrected
## Array 25 corrected
## Array 26 corrected
## Array 27 corrected
## Array 28 corrected
## Array 29 corrected
## Array 30 corrected
## Array 31 corrected
## Array 32 corrected
## Array 33 corrected
## Array 34 corrected
## Array 35 corrected
## Array 36 corrected
## Array 37 corrected
## Array 38 corrected
## Array 39 corrected
## Array 40 corrected
## Array 41 corrected
## Array 42 corrected
## Array 43 corrected
## Array 44 corrected
## Array 45 corrected
## Array 46 corrected
## Array 47 corrected
## Array 48 corrected
## Array 49 corrected
## Array 50 corrected
## Array 51 corrected
## Array 52 corrected
## Array 53 corrected
## Array 54 corrected
## Array 55 corrected
## Array 56 corrected
## Array 57 corrected
## Array 58 corrected
## Array 59 corrected
## Array 60 corrected
# Input: Background corrected EListRaw, Output: Elist
norm_data <- normalizeBetweenArrays(corrected_data, method = "quantile") # Normalizes the data
plotDensities(norm_data, log = TRUE, legend = "topright" , main = "Density of raw intensities for multiple arrays")
count = 0
for (sample_name in colnames(raw_data$E)) {
count = count + 1
limma::plotMA(norm_data, array = count, xlab = "Average Log-expression", ylab = "Expression Log-ratio (this sample vs. others)", main = sample_name)
}
boxplot(log2(raw_data$E)) # Comparing the raw data to the normalized data
suppressWarnings(boxplot(log2(norm_data$E)))
# Generating Raw probe level data
raw_intensities_df <- as.data.frame(raw_data$E, row.names = raw_data$genes$GeneName, col.names = colnames(raw_data$E))# Generates a dataframe containing the raw intensities with the genes as the rows and samples as column names.
raw_probe_level_data_df <- cbind(raw_data$genes, raw_intensities_df)
write.csv(raw_probe_level_data_df, file = file.path(dir, "/output_data/GLDS-174_output_data/Raw_probe_level_data-GLDS-174.csv"), row.names = FALSE)
# Generating raw gene level data
first_sample_mean = paste0(colnames(raw_data$E)[1], "_mean")
sample_one_raw = colnames(raw_data$E)[1]
summarized_columns_raw <- raw_probe_level_data_df %>% group_by(GeneName) %>% summarize(!!first_sample_mean := mean(get(sample_one_raw))) # Using this data frame to merge with
for (sample_name_raw in colnames(raw_data$E)) {
print(sample_name_raw)
sample_name_mean = paste0(sample_name_raw,"_mean")
sample_name_sd = paste0(sample_name_raw,"_sd")
sample_name_median = paste0(sample_name_raw,"_median")
summarized_columns_raw <- merge(summarized_columns_raw, raw_probe_level_data_df %>% group_by(GeneName) %>% summarize(!!sample_name_mean := mean(get(sample_name_raw)), !!sample_name_sd := sd(get(sample_name_raw)), !!sample_name_median := median(get(sample_name_raw)), .groups = "keep"))
} # This is where I'm merging the other data frames to the first one, to generate 1 data frame with all of the columnes (Gene_name, sample_mean, sample_sd, sample_median)
## [1] "GSM1930654_1p1"
## [1] "GSM1930655_1p2"
## [1] "GSM1930656_1p3"
## [1] "GSM1930657_1p4"
## [1] "GSM1930658_1p5"
## [1] "GSM1930659_1p6"
## [1] "GSM1930660_3p1"
## [1] "GSM1930661_3p2"
## [1] "GSM1930662_3p3"
## [1] "GSM1930663_3p4"
## [1] "GSM1930664_3p5"
## [1] "GSM1930665_3p6"
## [1] "GSM1930666_4p1"
## [1] "GSM1930667_4p2"
## [1] "GSM1930668_4p3"
## [1] "GSM1930669_4p4"
## [1] "GSM1930670_4p5"
## [1] "GSM1930671_4p6"
## [1] "GSM1930672_6p1"
## [1] "GSM1930673_6p2"
## [1] "GSM1930674_6p3"
## [1] "GSM1930675_6p4"
## [1] "GSM1930676_6p5"
## [1] "GSM1930677_6p6"
## [1] "GSM1930678_7p1"
## [1] "GSM1930679_7p2"
## [1] "GSM1930680_7p3"
## [1] "GSM1930681_7p4"
## [1] "GSM1930682_7p5"
## [1] "GSM1930683_7p6"
## [1] "GSM1930684_8p1"
## [1] "GSM1930685_8p2"
## [1] "GSM1930686_8p3"
## [1] "GSM1930687_8p4"
## [1] "GSM1930688_8p5"
## [1] "GSM1930689_8p6"
## [1] "GSM1930690_9p1"
## [1] "GSM1930691_9p2"
## [1] "GSM1930692_9p3"
## [1] "GSM1930693_9p4"
## [1] "GSM1930694_9p5"
## [1] "GSM1930695_9p6"
## [1] "GSM1930696_10p1"
## [1] "GSM1930697_10p2"
## [1] "GSM1930698_10p3"
## [1] "GSM1930699_10p4"
## [1] "GSM1930700_10p5"
## [1] "GSM1930701_10p6"
## [1] "GSM1930702_11p1"
## [1] "GSM1930703_11p2"
## [1] "GSM1930704_11p3"
## [1] "GSM1930705_11p4"
## [1] "GSM1930706_11p5"
## [1] "GSM1930707_11p6"
## [1] "GSM1930708_12p1"
## [1] "GSM1930709_12p2"
## [1] "GSM1930710_12p3"
## [1] "GSM1930711_12p4"
## [1] "GSM1930712_12p5"
## [1] "GSM1930713_12p6"
write.csv(summarized_columns_raw, file = file.path(dir, paste0("/output_data/GLDS-174_output_data/Raw_gene_level_data-GLDS-174.csv")), row.names = FALSE)
# Generating Normalized probe level data
norm_intensities_df <- as.data.frame(norm_data$E, row.names = norm_data$genes$GeneName, col.names = colnames(norm_data$E))
norm_probe_level_data_df <- cbind(norm_data$genes, norm_intensities_df)
write.csv(norm_probe_level_data_df, file = file.path(dir, "/output_data/GLDS-174_output_data/Normalized_probe_level_data-GLDS-174.csv"), row.names = FALSE)
# Generating normalized gene level data
first_sample_norm = paste0(colnames(norm_data$E)[1], "_mean")
sample_one_norm = colnames(norm_data$E)[1]
summarized_columns_norm <- norm_probe_level_data_df %>% group_by(GeneName) %>% summarize(!!first_sample_norm := mean(get(sample_one_norm)))
for (sample_name_norm in colnames(norm_data$E)) {
print(sample_name_norm)
sample_name_mean = paste0(sample_name_norm, "_mean")
sample_name_sd = paste0(sample_name_norm, "_sd")
sample_name_median = paste0(sample_name_norm, "_median")
summarized_columns_norm <- merge(summarized_columns_norm, norm_probe_level_data_df %>% group_by(GeneName) %>% summarize(!!sample_name_mean := mean(get(sample_name_norm)), !!sample_name_sd := sd(get(sample_name_norm)), !!sample_name_median := median(get(sample_name_norm)), .groups = "keep"))
}
## [1] "GSM1930654_1p1"
## [1] "GSM1930655_1p2"
## [1] "GSM1930656_1p3"
## [1] "GSM1930657_1p4"
## [1] "GSM1930658_1p5"
## [1] "GSM1930659_1p6"
## [1] "GSM1930660_3p1"
## [1] "GSM1930661_3p2"
## [1] "GSM1930662_3p3"
## [1] "GSM1930663_3p4"
## [1] "GSM1930664_3p5"
## [1] "GSM1930665_3p6"
## [1] "GSM1930666_4p1"
## [1] "GSM1930667_4p2"
## [1] "GSM1930668_4p3"
## [1] "GSM1930669_4p4"
## [1] "GSM1930670_4p5"
## [1] "GSM1930671_4p6"
## [1] "GSM1930672_6p1"
## [1] "GSM1930673_6p2"
## [1] "GSM1930674_6p3"
## [1] "GSM1930675_6p4"
## [1] "GSM1930676_6p5"
## [1] "GSM1930677_6p6"
## [1] "GSM1930678_7p1"
## [1] "GSM1930679_7p2"
## [1] "GSM1930680_7p3"
## [1] "GSM1930681_7p4"
## [1] "GSM1930682_7p5"
## [1] "GSM1930683_7p6"
## [1] "GSM1930684_8p1"
## [1] "GSM1930685_8p2"
## [1] "GSM1930686_8p3"
## [1] "GSM1930687_8p4"
## [1] "GSM1930688_8p5"
## [1] "GSM1930689_8p6"
## [1] "GSM1930690_9p1"
## [1] "GSM1930691_9p2"
## [1] "GSM1930692_9p3"
## [1] "GSM1930693_9p4"
## [1] "GSM1930694_9p5"
## [1] "GSM1930695_9p6"
## [1] "GSM1930696_10p1"
## [1] "GSM1930697_10p2"
## [1] "GSM1930698_10p3"
## [1] "GSM1930699_10p4"
## [1] "GSM1930700_10p5"
## [1] "GSM1930701_10p6"
## [1] "GSM1930702_11p1"
## [1] "GSM1930703_11p2"
## [1] "GSM1930704_11p3"
## [1] "GSM1930705_11p4"
## [1] "GSM1930706_11p5"
## [1] "GSM1930707_11p6"
## [1] "GSM1930708_12p1"
## [1] "GSM1930709_12p2"
## [1] "GSM1930710_12p3"
## [1] "GSM1930711_12p4"
## [1] "GSM1930712_12p5"
## [1] "GSM1930713_12p6"
write.csv(summarized_columns_norm, file = file.path(dir, "/output_data/GLDS-174_output_data/Normalized_gene_level_data-GLDS-174.csv"), row.names = FALSE)